Analysis date: 2021-02-12
library(plyr)
library(gtools)
library(pheatmap)
library(Matrix)
library(Hmisc)
library(ggpubr)
library(ggbeeswarm)
library(DESeq2)
library(tidyverse)
library(limma)
library(MultiAssayExperiment)
library(gplots)
source("data/Figure_layouts.R")
load("data/CLL_Proteomics_Setup.RData")
load("data/CLL_Proteomics_ConsensusClustering.RData")
colData(multiomics_MAE)$PG <- as.factor(CCP_group5[rownames(colData(multiomics_MAE))])
colData(multiomics_MAE)$CCP6_RNA <- as.factor(CCP_group6_RNA[rownames(colData(multiomics_MAE))])
wilcox_proteins_any <- function(g, alteration, plot=FALSE, output=TRUE){
if("SNPs" %in% (experiments(multiomics_MAE[alteration,,]) %>% names()) ){
ty="SNPs_"}else if("chrom_abber" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
ty="chrom_abber_"}else if("health_record_bin" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
ty="health_record_bin_"}
dat_g <- wideFormat(multiomics_MAE[c(g , alteration),,] ) %>% as_tibble()
dat_g <- dat_g %>% mutate(alt =as.logical(get(paste0(ty, alteration), envir=as.environment(dat_g))))
if(grepl("ENSG", g)){
assay_data="RNAseq_norm_"
cap <- metadata(multiomics_MAE)$gene_symbol_mapping %>% filter(ensembl_gene_id==g) %>% .$hgnc_symbol
col_b <- "#92c5de"
col_r <- "#f4a582"
cap_y <- paste("Transcript abundance", cap)
}else{
assay_data="proteomics_"
cap <- g
col_b <- "#0571b0"
col_r <- "#ca0020"
cap_y <- paste("Protein abundance", cap)
}
wt <- dat_g %>% filter(alt==0) %>% .[,paste0(assay_data, g)] %>% unlist
mut <- dat_g %>% filter(alt==1) %>% .[,paste0(assay_data, g)] %>% unlist
alt_lab <- alteration
alt_lab <- gsub("_mutated", "", alt_lab)
wt_lab <- "wt"
mut_lab <- "mut"
if(alt_lab=="IGHV"){
wt_lab="U-CLL"
mut_lab="M-CLL"}
wx.test <- wilcox.test(wt, mut)
if(plot==TRUE){
p <- dat_g %>% filter(!is.na(alt )) %>%
ggplot(aes_string("alt", paste0(assay_data,g), fill="alt" )) +
geom_boxplot() + geom_beeswarm() +
xlab(alt_lab) +
scale_x_discrete(labels=c(wt_lab, mut_lab)) +
ylab(cap_y)+
pp_sra +
scale_fill_manual(values = c(col_b,col_r))
print(p+theme(aspect.ratio=2)+ theme(legend.position = 'none') +
stat_compare_means(method = "wilcox", label = "p.signif",
comparisons = list(c("TRUE","FALSE"))) )
print(wx.test$p.value)
}
if(output==TRUE){return(c(g, alteration, length(wt), length(mut), wx.test$p.value))
}else if(all(plot==TRUE, output=="plot")){return(p)}
}
ZAP70_P_plot <- wilcox_proteins_any(g="ZAP70", plot = TRUE, output = "plot", alteration="IGHV_mutated")
## [1] 1.198102e-07
ZAP70_R_plot <- wilcox_proteins_any(g="ENSG00000115085", plot = TRUE, output = "plot", alteration="IGHV_mutated")
## [1] 5.071175e-08
ATM_P_plot <- wilcox_proteins_any(g="ATM", plot = TRUE, output = "plot", alteration="ATM")
## [1] 0.001030721
ATM_R_plot <- wilcox_proteins_any(g="ENSG00000149311", plot = TRUE, output = "plot", alteration="ATM")
## [1] 0.1724928
TP53_P_plot <- wilcox_proteins_any(g="TP53", plot = TRUE, output = "plot", alteration="TP53")
## [1] 0.0001178339
TP53_R_plot <- wilcox_proteins_any(g="ENSG00000141510", plot = TRUE, output = "plot", alteration="TP53")
## [1] 0.005025578
SF3B1_P_plot <- wilcox_proteins_any(g="SF3B1", plot = TRUE, alteration="SF3B1", output = "plot")
## [1] 0.2489914
XPO1_P_plot <- wilcox_proteins_any(g="XPO1", plot = TRUE, output = "plot", alteration="XPO1")
## [1] 0.003788337
XPO1_R_plot <- wilcox_proteins_any(g="ENSG00000082898", plot = TRUE, output = "plot", alteration="XPO1")
## [1] 0.1821531
IGHM_P_plot <- wilcox_proteins_any(g="IGHM", plot = TRUE, alteration="IGHV_mutated", output = "plot")
## [1] 0.00160812
plot_chromosome_theme <- list(
coord_cartesian(ylim=c(-0.7,0.7)),
facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
ylab("log2 norm. protein abundance"),
xlab("Location gene for protein on chromosome [bp]")
)
Chr12_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_trisomy12),
chromosome_name %in% c("12")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("trisomy12") +
geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=133275309, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#0571b0", "#ca0020"), name = "",
labels = c("wt", "trisomy 12" ))
Chr12_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
Chr11_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_del11q),
chromosome_name %in% c("11")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("del11q")+
geom_rect(xmin = 97400000, ymin=-0.68, ymax=0.68, xmax=110600000, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#0571b0", "#ca0020"), name = "",
labels = c("wt","del11q"))
Chr11_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
Chr13_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_del13q14),
chromosome_name %in% c("13")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("del13q")+
geom_rect(xmin = 47000000,ymin=-0.68, ymax=0.68, xmax=51000000, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#0571b0", "#ca0020"), name = "",
labels = c("wt","del13q"))
Chr13_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
message("In the region in which the deletion occurs (47Mb-51Mb, 2011 Ouillette et al.) the data are probably very noisy as the number of proteins we detected in the affected region is small:")
## In the region in which the deletion occurs (47Mb-51Mb, 2011 Ouillette et al.) the data are probably very noisy as the number of proteins we detected in the affected region is small:
proteomics_tbl_meta_biomart %>%
filter( !is.na(value),
chromosome_name %in% c("13"), start_position>47000000, start_position< 51000000) %>% dplyr::select(rowname) %>% unique() %>% nrow()
## [1] 13
Chr17_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_del17p13 ),
chromosome_name %in% c("17")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("del17p")+
geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=10800000, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#0571b0", "#ca0020"), name = "",
labels = c("wt","del17p"))
Chr17_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
Chr8_P_plot <- proteomics_tbl_meta_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_gain8q24 ),
chromosome_name %in% c("8")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("gain8q24")+
geom_rect(xmin = 117700001, ymin=-0.68, ymax=0.68, xmax=146364022, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#0571b0", "#ca0020"), name = "",
labels = c("wt","gain8q"))
Chr8_P_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
plot_chromosome_theme_RNA <- list(
#coord_cartesian(ylim=c(0.95,1.1)),
coord_cartesian(ylim=c(0.95,1.05)),
facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
ylab("log10 norm. RNA counts"),
xlab("Location gene on chromosome [bp]")
)
RNA_biomart<- left_join(
left_join((longFormat(multiomics_MAE[,,"RNAseq_norm"] ) %>% as_tibble()),
metadata(multiomics_MAE)$gene_symbol_mapping[c(1,2,3,5:7)],
by=c("rowname"="ensembl_gene_id")),
proteomics_tbl_meta_biomart_chrab, by=c("primary"))
## harmonizing input:
## removing 531 sampleMap rows not in names(experiments)
## removing 22 colData rownames not in sampleMap 'primary'
RNA_biomart <- left_join( left_join(RNA_biomart,
proteomics_tbl_meta_biomart_SNP, by=c("primary")),
proteomics_tbl_meta_biomart_health, by=c("primary"))
RNA_biomart_mean <- RNA_biomart %>%
group_by(rowname) %>%
summarise( mean_value = mean(value, na.rm = TRUE) )
RNA_biomart <- left_join(
RNA_biomart,
RNA_biomart_mean ) %>%
mutate(value = value/mean_value)
## Joining, by = "rowname"
# Trisomy 12
Chr12_R_plot <- RNA_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_trisomy12),
chromosome_name %in% c("12")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("trisomy12") +
geom_rect(xmin = 0, ymin=0.95, ymax=1.05, xmax=133275309, color="gray40", size=1.5, fill=NA) + scale_color_manual(values=c("#92c5de", "#f4a582"), name = "",
labels = c("wt","trisomy 12"))
Chr12_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
# del11q
Chr11_R_plot <- RNA_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_del11q),
chromosome_name %in% c("11")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("del11q")+
geom_rect(xmin = 97400000, ymin=0.95, ymax=1.05, xmax=110600000, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#92c5de", "#f4a582"), name = "",
labels = c("wt","del11q"))
Chr11_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
# del13q
Chr13_R_plot <- RNA_biomart %>%
filter( !is.na(value), !is.na( chrom_abber_del13q14),
chromosome_name %in% c("13")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("del13q")+
geom_rect(xmin = 47000000, ymin=0.95, ymax=1.05, xmax=51000000, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#92c5de", "#f4a582"), name = "",
labels = c("wt","del13q"))
Chr13_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
# del17p
Chr17_R_plot <- RNA_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_del17p13),
chromosome_name %in% c("17")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("del17p")+
geom_rect(xmin = 0, ymin=0.95, ymax=1.05, xmax=10800000, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#92c5de", "#f4a582"), name = "",
labels = c("wt","del17p"))
Chr17_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
Chr8_R_plot <- RNA_biomart %>%
filter( !is.na(value), !is.na(chrom_abber_gain8q24),
chromosome_name %in% c("8")) %>%
ggplot(aes(start_position, value, group=primary)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
plot_chromosome_theme_RNA +
pp_sra +
ggtitle("gain8q24")+
geom_rect(xmin = 117700001, ymin=0.95, ymax=1.05, xmax=146364022, color="gray40", size=1.5, fill=NA) +
scale_color_manual(values=c("#92c5de", "#f4a582"), name = "",
labels = c("wt","gain8q"))
Chr8_R_plot + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
save(ZAP70_P_plot, ZAP70_R_plot, TP53_P_plot, TP53_R_plot, ATM_P_plot, ATM_R_plot, Chr12_P_plot, Chr17_P_plot,
Chr12_R_plot, Chr17_R_plot, SF3B1_P_plot, XPO1_P_plot, XPO1_R_plot, Chr13_P_plot, Chr11_P_plot,
Chr13_R_plot, Chr11_R_plot, Chr8_P_plot,Chr8_R_plot, IGHM_P_plot,
file = "RData_plots/CLL_Proteomics_Genetics_Plots.RData")
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] gplots_3.1.1 MultiAssayExperiment_1.14.0
## [3] limma_3.44.3 forcats_0.5.1
## [5] stringr_1.4.0 dplyr_1.0.3
## [7] purrr_0.3.4 readr_1.4.0
## [9] tidyr_1.1.2 tibble_3.0.6
## [11] tidyverse_1.3.0 DESeq2_1.28.1
## [13] SummarizedExperiment_1.18.2 DelayedArray_0.14.1
## [15] matrixStats_0.58.0 Biobase_2.48.0
## [17] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [19] IRanges_2.22.2 S4Vectors_0.26.1
## [21] BiocGenerics_0.34.0 ggbeeswarm_0.6.0
## [23] ggpubr_0.4.0 Hmisc_4.4-2
## [25] ggplot2_3.3.3 Formula_1.2-4
## [27] survival_3.2-7 lattice_0.20-41
## [29] Matrix_1.3-2 pheatmap_1.0.12
## [31] gtools_3.8.2 plyr_1.8.6
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 splines_4.0.2
## [4] BiocParallel_1.22.0 digest_0.6.27 htmltools_0.5.1.1
## [7] magrittr_2.0.1 checkmate_2.0.0 memoise_2.0.0
## [10] cluster_2.1.0 openxlsx_4.2.3 annotate_1.66.0
## [13] modelr_0.1.8 jpeg_0.1-8.1 colorspace_2.0-0
## [16] blob_1.2.1 rvest_0.3.6 haven_2.3.1
## [19] xfun_0.20 crayon_1.4.0 RCurl_1.98-1.2
## [22] jsonlite_1.7.2 genefilter_1.70.0 glue_1.4.2
## [25] gtable_0.3.0 zlibbioc_1.34.0 XVector_0.28.0
## [28] car_3.0-10 abind_1.4-5 scales_1.1.1
## [31] DBI_1.1.1 rstatix_0.6.0 Rcpp_1.0.6
## [34] xtable_1.8-4 htmlTable_2.1.0 foreign_0.8-81
## [37] bit_4.0.4 htmlwidgets_1.5.3 httr_1.4.2
## [40] RColorBrewer_1.1-2 ellipsis_0.3.1 pkgconfig_2.0.3
## [43] XML_3.99-0.5 farver_2.0.3 nnet_7.3-15
## [46] dbplyr_2.0.0 locfit_1.5-9.4 tidyselect_1.1.0
## [49] labeling_0.4.2 rlang_0.4.10 AnnotationDbi_1.50.3
## [52] munsell_0.5.0 cellranger_1.1.0 tools_4.0.2
## [55] cachem_1.0.1 cli_2.3.0 generics_0.1.0
## [58] RSQLite_2.2.3 broom_0.7.4 evaluate_0.14
## [61] fastmap_1.1.0 yaml_2.2.1 knitr_1.31
## [64] bit64_4.0.5 fs_1.5.0 zip_2.1.1
## [67] caTools_1.18.1 nlme_3.1-151 xml2_1.3.2
## [70] compiler_4.0.2 rstudioapi_0.13 beeswarm_0.2.3
## [73] curl_4.3 png_0.1-7 ggsignif_0.6.0
## [76] reprex_1.0.0 geneplotter_1.66.0 stringi_1.5.3
## [79] highr_0.8 vctrs_0.3.6 pillar_1.4.7
## [82] lifecycle_0.2.0 data.table_1.13.6 bitops_1.0-6
## [85] R6_2.5.0 latticeExtra_0.6-29 KernSmooth_2.23-18
## [88] gridExtra_2.3 rio_0.5.16 vipor_0.4.5
## [91] assertthat_0.2.1 withr_2.4.1 GenomeInfoDbData_1.2.3
## [94] mgcv_1.8-33 hms_1.0.0 grid_4.0.2
## [97] rpart_4.1-15 rmarkdown_2.6 carData_3.0-4
## [100] lubridate_1.7.9.2 base64enc_0.1-3